clear all
close all


%==========================================================================
% MAIN ESTIMATION
%==========================================================================

%==========================================================================
% SET UP MD AND EM PARAMETERS
%==========================================================================
%------- MD ESTIMATION PARAMETERS------------------------------------------ 
    % Choose wether to produce estimation figures and statistics
    MDinput.produce_graphs   = 1;
    MDinput.print_statistics = 1;    
    
    % Set bounds for parameters mu and sigma
    MDinput.muL    = 0.005;  
    MDinput.muU    = 0.65; 
    MDinput.sigmaL = 0.01; 
    MDinput.sigmaU = 1.5;
    
    % Number of points on a grid for for mu and sigma
    % Choose whether to use a log scale for grid of mu and sigma
    MDinput.Nmu    = 50; 
    MDinput.Nsigma = 50; 
    MDinput.use_log_scale = 0 ; 
    
    % Choose one from the the following options:
    MDinput.use_CDF_when_possible = 0;  % if set to one uses the difference between CDFs whenever possible
    MDinput.use_CDF_only          = 1;  % if set to one uses CDF only
    MDinput.use_pdf_only          = 0;  % if set to one uses pdf only
    
    % Set threshold to truncate values of g with probability below g_low
    MDinput.g_low = 0.000001;
    
    % Optimization parameters
    MDinput.lambda_sum = exp(7);  % value of the multiplier/weight for constraint sum g=1  
    MDinput.weight_max = exp(22); % maximum weight for data point phi(t_1,t_2) 
    MDinput.weight_power = 1/2.5; % power in the weighting matrix
    
    % Regularization 
    % lambda_Tykhonov is a penalty for obtaining solutions with g that have too large
    % norm, i.e. it adds a penalty lambda_Tykhonov || g ||^2 to the objective
    % function
    MDinput.lambda_Tykhonov_vec  = 0.5;
    MDinput.lambda_g_smooth      = 0.1;     % value of 2-sided-HP used to preselect points in the grid for regularization
    MDinput.produce_graphs       = 1;       % produce graphs
    
    % output name
    MDinput.filename_out = 'MD_estimates_RP.mat';

%------- EM ESTIMATION PARAMETERS------------------------------------------
    % Choose wether to produce estimation figures and statistics
    EMinput.produce_graphs   = 1;
    EMinput.print_statistics = 1;
    
    % log-scale, Akaike
    EMinput.do_Akaike = 0 ;
    EMinput.use_Elog  = 0 ;
    
    % Store past values
    % Log-likelihood tends to be non-monotone and typically stops at
    % n_EM. We add an option to store last several iterations and choose the one with
    % the highest log-likelihood.
    EMinput.choose_the_highest = 1;
    EMinput.tokeep = 20;
    
    % parameters for EM procedure
    EMinput.mu_min      = 0.001;    % minium value of mu 
    EMinput.sigma_min   = 10^(-3);  % minium value of sigma  (this is needed so that f > 0)
    EMinput.g_min       = 1e-10 ;   % minium value of g
    EMinput.z_min       = 1e-10;    % minium value of z
    EMinput.n_EM        = 300;      % maximum number of iterations in the EM algorithm
    EMinput.tol         = 10^(-8);  % tolerance on maximum difference between iteratins of EM
    EMinput.Niter_MLE   = 5000 ;    % maximum number of iterations for each type k
    EMinput.toler_MLE   = 10^(-7);  % tolerance (in percentage) to stop iterations for each type k   
    
    % other parameters
    EMinput.print_iter = 1; % print_iter = 1, print messages with iteration of EM
    EMinput.MDestimates = MDinput.filename_out;
        
    % output name
    EMinput.filename_out = 'EM_estimates_RP.mat';

%==========================================================================
%------- DATA INPUTS AND ESTIMATION ---------------------------------------
    DATAinput.T    = 104; % maximum duration to be used in the estimation    
    DATAinput.data = load('IG_raw_0_105_RP.txt');   
    DATAinput.filename_out = sprintf('estimates_RP.mat');   
    DATAinput.lambda_smooth = 1;    % smooth the data

    % Define shortest and longest spell to have in the data
    MDinput.TL = 0;
    MDinput.TU = DATAinput.T;

    % Choose smallest and largest t to use in estimation
    EMinput.TL = 0;
    EMinput.TU =  DATAinput.T;
    EMinput.tstep = 1/2;

 %   output_main = fct_IGpaper_estimation_main(DATAinput,MDinput,EMinput);
%--------------------------------------------------------------------------    

close all; clc;

%==========================================================================
% BOOTSTRAP
    Ns = 500; % number of samples
    DATAinput_bootstrap = DATAinput;
    DATAinput_bootstrap.Nsubsample = sum(DATAinput.data(:,3));
    DATAinput_bootstrap.replacement = 1;

    MDinput.produce_graphs   = 0;
    MDinput.print_statistics = 0;
    EMinput.produce_graphs   = 0;
    EMinput.print_statistics = 0;
    EMinput.print_iter       = 0;

for sample = 1:Ns
    
    fprintf('Bootstrap sample: %d \n',sample)

    %--------------------------------------------------------------------------
    % DRAW A RANDOM SAMPLE WITH REPLACEMENT 
    output      = fct_draw_subsample_data(DATAinput.data,DATAinput_bootstrap.Nsubsample,DATAinput_bootstrap.replacement);    

    DATAinput_bootstrap.data            = output.data;
    DATAinput_bootstrap.filename_out    = sprintf('bootstrap_sample%d.mat',sample);    
    output_main = fct_IGpaper_estimation_main(DATAinput_bootstrap,MDinput,EMinput);
 end

%==========================================================================
% USE BOOTSTRAPPED ESTIMATED TO PRODUCE STANDARD ERRORS
% REVERSE INTERVAL

    Gnames = {'GBAR', 'GUSHORT', 'GBARW', 'GUSHORTW',...
    'GBARP2', 'GUSHORTP2', 'GBARP2W', 'GUSHORTP2W'};
    
    Gdecompnames = {'GBARdecomp', 'GUSHORTdecomp', 'GBARWdecomp', 'GUSHORTWdecomp',...
    'GBARP2decomp', 'GUSHORTP2decomp', 'GBARP2Wdecomp', 'GUSHORTP2Wdecomp'};
    
    neg_share = zeros(Ns,8);
    NN = ( EMinput.TU-  EMinput.TL)/EMinput.tstep+1;
    HR_agg_min_vec = zeros(NN,Ns);
    HR_agg_max_vec = zeros(NN,Ns);
    HR_struc_min_vec = zeros(NN,Ns);
    HR_struc_max_vec = zeros(NN,Ns);
    HR_heter_min_vec = zeros(NN,Ns);
    HR_heter_max_vec = zeros(NN,Ns);
    
    for sample = 1:Ns    
       
        load(sprintf('bootstrap_sample%d.mat',sample));  
        HR_agg_mat = [];
        HR_struc_mat = [];
        HR_heter_mat = [];
        
        for g = 1:8
            G     = eval(cell2mat(Gnames(g)));    
            neg_share(sample,g) = G.neg_share;
            Gdecomp     = eval(cell2mat(Gdecompnames(g)));    
            
            if neg_share(sample,g)<=0.99
                HR_agg_mat   = [HR_agg_mat Gdecomp.HR_agg];
                HR_struc_mat = [HR_struc_mat Gdecomp.HR_struc_multi];
                HR_heter_mat = [HR_heter_mat Gdecomp.HR_heter_multi];    
            end
        end
                  
        HR_agg_min_vec(:,sample) = min(HR_agg_mat')';
        HR_agg_max_vec(:,sample) = max(HR_agg_mat')';
        HR_struc_min_vec(:,sample) = min(HR_struc_mat')';
        HR_struc_max_vec(:,sample) = max(HR_struc_mat')';
        HR_heter_min_vec(:,sample) = min(HR_heter_mat')';
        HR_heter_max_vec(:,sample) = max(HR_heter_mat')';
       
    end
    
    HR_agg_min_sort     = round(sort(HR_agg_min_vec')',5);
    HR_agg_max_vec_sort = round(sort(HR_agg_max_vec')',5);
    HR_struc_min_sort = round(sort(HR_struc_min_vec')',5);
    HR_struc_max_sort = round(sort(HR_struc_max_vec')',5);
    HR_heter_min_sort = round(sort(HR_heter_min_vec')',5);
    HR_heter_max_sort = round(sort(HR_heter_max_vec')',5);
    
    li = max(1,round(Ns/100*2.5));
    ui = min(Ns,round(Ns/100*97.5));
    
    SE.HR_agg_min_ui = HR_agg_min_sort(:,ui);
    SE.HR_agg_min_li = HR_agg_min_sort(:,li);
    SE.HR_agg_max_vec_sort_ui = HR_agg_max_vec_sort(:,ui);
    SE.HR_agg_max_vec_sort_li = HR_agg_max_vec_sort(:,li);
    
    SE.HR_struc_min_sort_ui = HR_struc_min_sort(:,ui);
    SE.HR_struc_min_sort_li = HR_struc_min_sort(:,li);
    SE.HR_struc_max_sort_ui = HR_struc_max_sort(:,ui);
    SE.HR_struc_max_sort_li = HR_struc_max_sort(:,li);
    
    SE.HR_heter_min_sort_ui = HR_heter_min_sort(:,ui);
    SE.HR_heter_min_sort_li = HR_heter_min_sort(:,li);
    SE.HR_heter_max_sort_ui = HR_heter_max_sort(:,ui);
    SE.HR_heter_max_sort_li = HR_heter_max_sort(:,li);

    save('standard_errors_RP.mat','SE');

%==========================================================================
% ESTIMATION -- DIFFERENT SAMPLE SIZE
% CHOOSE THE SAME MDinput and EMinput as before
    DATAinput260.T    = 260;
    DATAinput260.data = load('IG_raw_0_261_RP.txt');
    DATAinput260.filename_out = sprintf('estimates_RP_T260.mat');    
    DATAinput260.lambda_smooth = 1;   

    % Define shortest and longest spell to have in the data
    MDinput.TL = 0;
    MDinput.TU = DATAinput260.T;

    % Choose smallest and largest t to use in estimation
    EMinput.TL = 0;
    EMinput.TU = DATAinput260.T;
    EMinput.tstep = 1/2;% 
    output_main = fct_IGpaper_estimation_main(DATAinput260,MDinput,EMinput);

%==========================================================================
% ACCURACY OF THE METHOD
% CREATE ARTIFICIAL DATA FROM THE MODEL AND RUN ESTIMATION AGAIN

% Define shortest and longest spell to have in the data
    MDinput.TL = 0;
    MDinput.TU = 104;
    EMinput.TL = 0;
    EMinput.TU = 104;
    EMinput.tstep = 1/2;
    EMinput.print_iter = 0;

    % 1 type        
        GPLUStrue.g_vec     = 1;
        GPLUStrue.mu_vec    = 0.05;
        GPLUStrue.sigma_vec = 0.25;
        DECOMPinput         = GPLUStrue;
        DECOMPinput.tmin    = 0;
        DECOMPinput.tmax    = 104;
        GPLUStruedecomp = fct_decomposition_increments(DECOMPinput);
        save('accuracy_sample1_RP_GPLUStrue.mat','GPLUStrue','GPLUStruedecomp');

        % draw a random sample from this distribution
        RDinput1.T           = DATAinput.T;
        RDinput1.Npeople     = 10^6;
        RDinput1.random_draw = 1;
        RDinput1.TU          = DATAinput.T;
        RDinput1.t1grid      = [0:1:RDinput1.T];
        RDinput1.t2grid      = RDinput1.t1grid;
        
        RDinput1.g_vec      = GPLUStrue.g_vec;
        RDinput1.mu_vec     = GPLUStrue.mu_vec;
        RDinput1.sigma_vec  = GPLUStrue.sigma_vec;
      
        RDoutput1  = fct_draw_from_IG_mixture(RDinput1);

        DATAinput_type1.p1   = RDoutput1.p1;
        DATAinput_type1.p2   = RDoutput1.p2;
        DATAinput_type1.data = RDoutput1.data;
        DATAinput_type1.T    = RDinput1.T ;
        DATAinput_type1.lambda_smooth = 0; % no smoothing is needed with
        DATAinput_type1.filename_out = 'accuracy_sample1_RP.mat';       
        
        output_main = fct_IGpaper_estimation_main(DATAinput_type1,MDinput,EMinput);
        
    % 2 types
        GPLUStrue.g_vec     = [0.7;0.3];     
        GPLUStrue.mu_vec    = [0.05;0.1];
        GPLUStrue.sigma_vec = [0.25;1/1.5];               
        DECOMPinput = GPLUStrue;
        DECOMPinput.tmin  = 0;
        DECOMPinput.tmax  = 104;
        GPLUStruedecomp = fct_decomposition_increments(DECOMPinput);
        save('accuracy_sample2_RP_GPLUStrue.mat','GPLUStrue','GPLUStruedecomp');

        % draw a random sample from this distribution
        RDinput2.T           = DATAinput.T;
        RDinput2.Npeople     = 10^6;
        RDinput2.random_draw = 1;
        RDinput2.TU          = DATAinput.T;
        RDinput2.t1grid      = [0:1:DATAinput.T];
        RDinput2.t2grid      = RDinput2.t1grid;
        
        RDinput2.g_vec      = GPLUStrue.g_vec;
        RDinput2.mu_vec     = GPLUStrue.mu_vec;
        RDinput2.sigma_vec  = GPLUStrue.sigma_vec;
      
        RDoutput2  = fct_draw_from_IG_mixture(RDinput2);

        DATAinput_type2.p1   = RDoutput2.p1;
        DATAinput_type2.p2   = RDoutput2.p2;
        DATAinput_type2.data = RDoutput2.data;
        DATAinput_type2.T    = RDinput2.T ;
        DATAinput_type2.lambda_smooth = 0; % no smoothing is needed with
        DATAinput_type2.filename_out = 'accuracy_sample2_RP.mat';       
        
        output_main = fct_IGpaper_estimation_main(DATAinput_type2,MDinput,EMinput);

    % estimated types
        load estimates_RP.mat
        GPLUStrue.g_vec     = GPLUS.g_vec;
        GPLUStrue.mu_vec    = GPLUS.mu_vec;
        GPLUStrue.sigma_vec = GPLUS.sigma_vec;
        DECOMPinput         = GPLUStrue;
        DECOMPinput.tmin    = 0;
        DECOMPinput.tmax    = 104;
        GPLUStruedecomp     = fct_decomposition_increments(DECOMPinput);
        save('accuracy_sample3_RP_GPLUStrue.mat','GPLUStrue','GPLUStruedecomp');

        % draw a random sample from this distribution
        RDinput3.T           = DATAinput.T;
        RDinput3.Npeople     = 10^6;
        RDinput3.random_draw = 1;
        RDinput3.TU          = DATAinput.T;
        RDinput3.t1grid      = [0:1:DATAinput.T];
        RDinput3.t2grid      = RDinput3.t1grid;
        
        RDinput3.g_vec      = GPLUStrue.g_vec;
        RDinput3.mu_vec     = GPLUStrue.mu_vec;
        RDinput3.sigma_vec  = GPLUStrue.sigma_vec;
      
        RDoutput3  = fct_draw_from_IG_mixture(RDinput3);

        DATAinput_type3.p1  = RDoutput3.p1;
        DATAinput_type3.p2  = RDoutput3.p2;
        DATAinput_type3.data = RDoutput3.data;
        DATAinput_type3.T    = RDinput3.T ;
        DATAinput_type3.lambda_smooth = 0; % no smoothing is needed with
        DATAinput_type3.filename_out = 'accuracy_sample3_RP.mat';       
        
        output_main = fct_IGpaper_estimation_main(DATAinput_type3,MDinput,EMinput);

%==========================================================================
% FIGURES AND TABLES
    clc; clear all; close all

    % produce text output files for loading into LaTeX
    output_files_write = 1;

    fct_paper_figures_tables_RP(output_files_write)